ROAD ACCIDENTS 2018

loading the dataset

df=read.csv("C:\\Users\\DELL\\Downloads\\accidents_18.csv",stringsAsFactors = TRUE,header = TRUE)

loading the packages

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.4
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.5
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Data is taken from

From this dataset we are interested in looking only the Accidents & Population(2018) for all STATES/UT and it is further factorized into REGIONS. ### data manipulation

accidents1=select(df,c('Region','States_UT','Population_2018','Accidents')) %>%
  mutate(per_lakh=(Accidents/Population_2018)*100000)
  • we have used dplyr package to select and mutate the data
  • And have arrived at a new variable called ACCIDENTS (PER LAKH POPULATION)
  • We further use ggplot2 and plotly to visualize the data.

data visualization

acc_point=ggplot(accidents1,aes(x=Accidents,y=Population_2018)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Accidents vs Population(2018)") +
  xlab("ACCIDENTS") +
  ylab("POPULATION") +
  theme_classic()
plot(acc_point)
## `geom_smooth()` using formula 'y ~ x'

cor(accidents1$Population_2018,accidents1$Accidents,method = 'pearson')
## [1] 0.9136955

* the above scatter plot tells us that there is a linear relationship between Population and Accidents. * when the population increases the accidents to increases. * therefore we cannot come to a conclusion that STATE WITH HIGH ACCIDENTS IS CONSIDERED TO BE THE TOP STATE WITH HIGH ACCIDENT RATE. * therefore we use the per one lakh population to find the state with highest accidents rate.

as we all know there is always a linear relationship between accidents & population

acc_point_1=ggplot(accidents1,aes(x=Accidents,y=per_lakh)) +
  geom_point(color="red",size=6) +
  geom_smooth() +
  ggtitle("ACCIDENTS per one lakh population") +
  xlab("ACCIDENTS") +
  ylab("PER LAKH")+
  theme_light()
plot(acc_point_1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

cor(accidents1$Accidents,accidents1$per_lakh,method = 'pearson')
## [1] 0.4034025
  • From the above plot we come to know that per lakh population is not linear to the accidents.
  • Keeping this we can arrive at knowing the states/UT with high accidents rate.
acc_hist=ggplot(accidents1,aes(x=Accidents)) +
  geom_histogram(bins=20,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(Accidents),color='blue',lwd=1)) +
  geom_vline(aes(xintercept =median(Accidents),color='green',lwd=1)) +
  ggtitle("Histogram for Accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(acc_hist)

summary(accidents1$Accidents)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    20.0   473.5  1127.2  1477.8  7179.0
  • from the histogram and summary we come to know that the mean is lesser than the median, so the accidents variable is negatively skewed (LEFT SKEWED).
  • from the histogram we come to know most of the states faced from 0-2000 accidents most frequently in 2018.
  • but we know that the accidents variable is not enough to arrive at a conclusion.
acc_hist_pop=ggplot(accidents1,aes(x=per_lakh)) +
  geom_histogram(bins=20,color='red')+
  geom_vline(aes(xintercept=mean(per_lakh),color='green',lwd=2)) +
  geom_vline(aes(xintercept=median(per_lakh),color='orange'))+
  ggtitle("Histogram for accidents per lakh population")+
  xlab("PER LAKH POPULATION") +
  ylab("Count")+
  theme_dark()
plot(acc_hist_pop)

summary(accidents1$per_lakh)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.220   2.723   2.820   4.184   7.370
  • From the above histogram and summary function we finally get that the accidents per lakh population is almost normally distributed.

To know the outliers of our data we have used the boxplot

reg_acc_box=ggplot(accidents1,aes(x=Region,y=Accidents,color=Region))+
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun = mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for accidents in every Region") +
  xlab("REGION") +
  ylab("ACCIDENTS")
plot(reg_acc_box)

subset(accidents1,accidents1$Accidents %in% boxplot(accidents1$Accidents ~ accidents1$Region)$out)

##       Region            States_UT Population_2018 Accidents per_lakh
## 3  northeast                Assam        31205576       681 2.182302
## 32        UT Dadra & Nagar Haveli          343709        22 6.400763
  • From the above boxplot we come to know that ASSAM & Dadra & Nagar Haveli are the outliers but again looking at the linear relationship , We look into the accidents per lakh population.
reg_perlakh_box=ggplot(accidents1,aes(x=Region,y=per_lakh,color=Region))+
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun = mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for accidents per lakh population  in every Region") +
  xlab("REGION") +
  ylab("per lakh pop.")
plot(reg_perlakh_box)

subset(accidents1,accidents1$per_lakh %in% boxplot(accidents1$per_lakh ~ accidents1$Region)$out)

##       Region            States_UT Population_2018 Accidents per_lakh
## 23 northeast               Sikkim          610577        45 7.370078
## 32        UT Dadra & Nagar Haveli          343709        22 6.400763
  • From the above boxplot we conclude by saying that from the overall analysis of accidents in INDIA, we have the outliers as SIKKIM and Dadra & Nagar Haveli.
  • These two states have high kill rate per one lakh population.
  • If we change the outliers to mean. The exact SUMMARY of accidents in INDIA is founded out.

TO LOOK INTO THE TOP 10 AND BOTTOM 10 ACCIDENTS in INDIA.

top_10_states_acc=top_n(accidents1,10,per_lakh) %>%
  arrange(desc(per_lakh))
top10_acc_plt=ggplot(top_10_states_acc,aes(x=States_UT,y=per_lakh)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH ACCIDENTS RATE") +
  xlab("STATES/UT") +
  ylab("PER LAKH POPULATION") +
  theme_dark()
plot(top10_acc_plt)

* From the above barchart we get to know that these are States/UT with high accident rate.

bottom_10_states_acc=filter(accidents1,per_lakh!=0) %>%
  top_n(-10,per_lakh) %>%
  arrange(per_lakh) %>%
  ggplot(aes(x=States_UT,y=per_lakh)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW ACCIDENTS RATE") +
  xlab("STATES/UT") +
  ylab("PER LAKH POPULATION") +
  theme_dark()
plot(bottom_10_states_acc)

* From the above barchart we conclude by saying that MEGHALAYA has the best accident rate among other states and SIKKIM has the worst rate of accidents in INDIA.

TRAFFIC VIOLATIONS

Now we use the traffic violations data to find the state wise rate for traffic violations
violating_rules=select(df,c(2:15))
violating_rules=select(violating_rules,c(-3:-4))

violating_rules=mutate(violating_rules,overspeed_kill_per10_accidents=(Overspeed_kill/Overspeed_accidents) * 10) %>%
  mutate(drunken_drive_kill_per10_accidents=(Drunkendrive_kill/Drunkendrive_accidents) * 10) %>%
  mutate(wrongside_kill_per_10_accidents=(Wrongside_kill/Wrongside_accidents) * 10) %>%
  mutate(redsignal_kill_per10_accidents=(Redsignal_kill/Redsignal_accidents) * 10) %>%
  mutate(mobile_kill_per10_accidents=(Mobile_kill/Mobile_accidents) * 10)
violations=mutate(violating_rules,total_accidents = Overspeed_accidents + Drunkendrive_accidents +
                    Wrongside_accidents +  Redsignal_accidents + Mobile_accidents) %>%
  mutate(total_kills =  Overspeed_kill + Drunkendrive_kill +
           Wrongside_kill +  Redsignal_kill + Mobile_kill) %>%
  mutate(per10_accidents = overspeed_kill_per10_accidents + drunken_drive_kill_per10_accidents +
           wrongside_kill_per_10_accidents +  redsignal_kill_per10_accidents + mobile_kill_per10_accidents)
violations = select(violations,c('Region','total_accidents','total_kills','per10_accidents')) %>%
  group_by(Region)
violating_accidents= select(violations,c('Region','per10_accidents'))%>%
  filter(per10_accidents > 0) %>%
  group_by(Region) %>%
  summarise(KILL_PER10_ACCIDENTS=(sum(per10_accidents)/n())/10)
data.frame(violating_accidents)
##      Region KILL_PER10_ACCIDENTS
## 1   central            1.4862521
## 2      east            3.1587052
## 3     north            2.2632494
## 4 northeast            1.8069869
## 5     south            0.8661236
## 6        UT            1.0463237
## 7      west            1.5321885
  • From the above dataframe, we come to know the EASTERN REGION faces 3.1587052 kills per every 10 accidents.
  • And interestingly, though SOUTH INDIA faces a large number of accidents , it faces only 0.8661236 kills per every 10 accidents due to TRAFFIC VIOLATIONS
violation_bar=plot_ly(violating_accidents,x= ~Region) %>%
  add_trace(y= ~KILL_PER10_ACCIDENTS,name='ACCIDENTS (REGION_WISE)',type='bar')
violation_bar
violation_pie=plot_ly(violating_accidents,labels= ~Region,values= ~KILL_PER10_ACCIDENTS,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')
violation_pie

INSIGHTS

  • From the above EDA & Statistical Inferences
  • We come to know that CENTRAL REGION of INDIA faces the more number of accidents.
  • And EASTERN INDIA is facing more kills due to traffic violations.
  • this suggests us two things * PEOPLE NOT FOLLOWING THE TRAFFIC RULES * TRAFFIC RULES ARE NOT IMPOSED STRICTLY

NOW LETS LOOK DEEP INTO THESE TWO SUGESSTIONS

Loading the dataset for traffic violations by people

violation=read.csv("C:\\Users\\DELL\\Downloads\\violation.csv",header = TRUE)

For our further analysis we are making region as a factor variable.

violation=within(violation,{
  Region=factor(Region)})

Analysis for overspeeding

  • We are selecting only the data for overspeeding and creating a column for knowing kills for every 10 overspeeding accidents.
overspeed=select(violation,c(2:8)) %>%
  mutate(kill_per_overspeed_acc=(over_kill/over_acc) * 10)
## histogram
overspeed_hist=ggplot(overspeed,aes(x=over_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(over_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept =median(over_acc),color='green',lwd=1)) +
  ggtitle("Histogram for overspeed accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_minimal()
plot(overspeed_hist)

summary(overspeed$over_acc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   186.2  2965.0  8628.1 13070.5 46113.0
  • the data is right skewed.
## outlier detection using boxplot
overspeed_boxplot=ggplot(overspeed,aes(x=Region,y=over_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for overspeed accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(overspeed_boxplot)

subset(overspeed,overspeed$over_acc %in% boxplot(overspeed$over_acc ~ overspeed$Region)$out)

##       Region States_UTs over_acc over_kill over_gre_inj over_min_inj
## 3  northeast      Assam     2532       995         1264          172
## 22     north  Rajasthan    20132      9618         6160        13735
## 34        UT      Delhi     2866       748          401         2217
##    over_tot_inj kill_per_overspeed_acc
## 3          1436               3.929700
## 22        19895               4.777469
## 34         2618               2.609909
  • Outliers are ASSAM,RAJASTHAN,DELHI for overspeed accidents as they have accidents occured beyond the range of their respective Regions
## plot
overspeed_point=ggplot(overspeed,aes(x=over_acc,y=over_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(overspeed_point)
## `geom_smooth()` using formula 'y ~ x'

cor(overspeed$over_acc,overspeed$over_kill,method = 'pearson')
## [1] 0.8647379
  • there is a good correlation between overspeed accidents and kills.
cor(overspeed$over_tot_inj,overspeed$over_gre_inj,method = 'pearson')
## [1] 0.6523911
cor(overspeed$over_tot_inj,overspeed$over_min_inj,method = 'pearson')
## [1] 0.9426355
  • there is a strong correlation between total injuries and minor injuries
  • this tells that most of the injuries occured where minor injuries.
## barplot
overspeed_injuries=plot_ly(overspeed,x= ~States_UTs) %>%
  add_trace(y= ~over_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~over_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~over_tot_inj,name='Total Injury',type='bar')
overspeed_injuries
overspeed_injuries_region=plot_ly(overspeed,x= ~Region) %>%
  add_trace(y= ~over_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~over_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~over_tot_inj,name='Total Injury',type='bar')
overspeed_injuries_region
top10_overspeed_kill=filter(overspeed,over_acc != over_kill ) %>%
  top_n(10,kill_per_overspeed_acc) %>%
  arrange(desc(kill_per_overspeed_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_overspeed_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO overspeed") +
  xlab("STATES/UT") +
  ylab("overspeed KILLS") +
  theme_dark()
plot(top10_overspeed_kill)

bottom10_overspeed_kill=filter(overspeed,over_acc != over_kill ) %>%
  top_n(-10,kill_per_overspeed_acc) %>%
  arrange(desc(kill_per_overspeed_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_overspeed_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO overspeed") +
  xlab("STATES/UT") +
  ylab("overspeed KILLS") +
  theme_dark()
plot(bottom10_overspeed_kill)

* Most of the kills happened due to overspeed is found in MIZORAM and least is in ANDAMAN & NICOBAR ISLANDS * States like PUNJAB & BIHAR is suggested to look into this problem with high attention to reduce the count of kills.

overspeed_result = select(overspeed,c('Region','kill_per_overspeed_acc')) %>%
  group_by(Region) %>%
  summarise(kill_per10_overspeed_accidents=(sum(kill_per_overspeed_acc)/n())/10)
data.frame(overspeed_result)
##      Region kill_per10_overspeed_accidents
## 1   central                      0.4237846
## 2      east                      0.6071363
## 3     north                      0.4446250
## 4 northeast                      0.4479920
## 5     south                      0.2433911
## 6        UT                      0.4218447
## 7      west                      0.2990527
overspeed_bar=plot_ly(overspeed_result,x= ~Region) %>%
  add_trace(y= ~kill_per10_overspeed_accidents,name=' OVERSPEED ACCIDENTS (REGION_WISE)',type='bar')
overspeed_bar
overspeed_pie=plot_ly(overspeed_result,labels= ~Region,values= ~kill_per10_overspeed_accidents,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')
overspeed_pie
  • From the above visualizations, we come to know that EAST INDIA is facing 0.6071363 kills per 10 accidents due to overspeeding.
  • And 0.2433911 kills per 10 accidents due to overspeeding is the least faced by SOUTH INDIA.
  • The share % of each region in 2018 (kills per 10 overspeeding accidents) is given in the pie chart above. ### Suggestions
  • The states in the East India such as BIHAR, WEST BENGAL, JHARKHAND & ODISHA must look into this seriously has they as a region face more kills per every 10 accidents.

Analysis for drunken drive

drunkendrive=select(violation,c(2:13))
drunkendrive=select(drunkendrive,c(-3:-7)) %>% 
  mutate(kill_per_drunkendrive_acc=(drunk_kill/drunk_acc) * 10)
## histogram

drunkendrive_hist=ggplot(drunkendrive,aes(x=drunk_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(drunk_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept =median(drunk_acc),color='green',lwd=1)) +
  ggtitle("Histogram for drunkendrive accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(drunkendrive_hist)

summary(drunkendrive$drunk_acc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    19.0   109.0   333.8   324.8  3595.0
  • the summary and the histogram tells us the data of accidents due to drunken drive is positively skewed.
  • the histogram says that many states face accidents due to drunken drive is between 0 to 600 frequently.
## outlier detection using boxplot
drunkendrive_boxplot=ggplot(drunkendrive,aes(x=Region,y=drunk_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for drunkendrive accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(drunkendrive_boxplot)

subset(drunkendrive,drunkendrive$drunk_acc %in% boxplot(drunkendrive$drunk_acc ~ drunkendrive$Region)$out)

##       Region States_UTs drunk_acc drunk_kill drunk_gre_inj drunk_min_inj
## 3  northeast      Assam       377        130           270            85
## 34        UT      Delhi       333         72            30           262
##    drunk_tot_inj kill_per_drunkendrive_acc
## 3            355                  3.448276
## 34           292                  2.162162
  • we have found the outliers of the accidents of the drunken drive which is ASSAM & DELHI due to their high kill rate due to which is beyond the range for drunken drive kills in their respective region.
  • by replacing the count into the mean values the exact mean is found.
## plot
drunkendrive_point=ggplot(drunkendrive,aes(x=drunk_acc,y=drunk_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(drunkendrive_point)
## `geom_smooth()` using formula 'y ~ x'

cor(drunkendrive$drunk_acc,drunkendrive$drunk_kill,method = 'pearson')
## [1] 0.9314265
  • It is found that there is high correlation between accidents & kill due to drunkendrive.
  • every time accidents increases the kills also increases which alarming.
## barplot
drunkendrive_injuries=plot_ly(drunkendrive,x= ~States_UTs) %>%
  add_trace(y= ~drunk_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~drunk_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~drunk_tot_inj,name='Total Injury',type='bar')
drunkendrive_injuries
drunkendrive_injuries_region=plot_ly(drunkendrive,x= ~Region) %>%
  add_trace(y= ~drunk_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~drunk_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~drunk_tot_inj,name='Total Injury',type='bar')
drunkendrive_injuries_region
top10_drunkendrive_kill=filter(drunkendrive,drunk_acc != drunk_kill ) %>%
  top_n(10,kill_per_drunkendrive_acc) %>%
  arrange(desc(kill_per_drunkendrive_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_drunkendrive_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO drunkendrive") +
  xlab("STATES/UT") +
  ylab("Drunkendrive KILLS") +
  theme_dark()
plot(top10_drunkendrive_kill)

bottom10_drunkendrive_kill=filter(drunkendrive,drunk_acc != drunk_kill & drunk_acc > 0 & drunk_kill > 0 &
                                    drunk_acc > drunk_kill) %>%
  top_n(-10,kill_per_drunkendrive_acc) %>%
  arrange(desc(kill_per_drunkendrive_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_drunkendrive_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO drunkendrive") +
  xlab("STATES/UT") +
  ylab("Drunkendrive KILLS") +
  theme_dark()
plot(bottom10_drunkendrive_kill)

* We conclude by saying that the states UTTARAKHAND & MIZORAM faces more kills due to drunknen drive while ANDHRA PRADESH & WEST BENGAL is good in controlling drunken drive accidents. * The central region faces more accidents and as we know that there is a linear relationship btw accidents & kills , the same region is facing more INJURIES too.

drunkendrive_result = select(drunkendrive,c('Region','kill_per_drunkendrive_acc')) %>%
  filter(kill_per_drunkendrive_acc >0) %>%
  group_by(Region) %>%
  summarise(kill_per10_drunkendrive_accidents=(sum(kill_per_drunkendrive_acc)/n())/10)
data.frame(drunkendrive_result)
##      Region kill_per10_drunkendrive_accidents
## 1   central                         0.4310937
## 2      east                         0.3663242
## 3     north                         0.4224040
## 4 northeast                         0.5689041
## 5     south                         0.1424085
## 6        UT                         0.1795367
## 7      west                         0.2697860
drunkendrive_bar=plot_ly(drunkendrive_result,x= ~Region) %>%
  add_trace(y= ~kill_per10_drunkendrive_accidents,name=' DRUNKENDRIVE ACCIDENTS (REGION_WISE)',type='bar')
drunkendrive_bar
drunkendrive_pie=plot_ly(drunkendrive_result,labels= ~Region,values= ~kill_per10_drunkendrive_accidents,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')
drunkendrive_pie

Analysis of wrongside driving accidents.

wrongside=select(violation,c(2:18))
wrongside=select(wrongside,c(-3:-12)) %>%
  mutate(kill_per_wrongside_acc=(wrongside_kill/wrongside_acc) * 10)
## histogram

wrongside_hist=ggplot(wrongside,aes(x=wrongside_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(wrongside_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept = median(wrongside_acc),color='green',lwd=1)) +
  ggtitle("Histogram for Wrongside accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(wrongside_hist)

summary(wrongside$wrongside_acc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0    22.0   235.0   688.4  1055.0  4572.0
## outlier detection using boxplot
wrongside_boxplot=ggplot(wrongside,aes(x=Region,y=wrongside_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for wrongside accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(wrongside_boxplot)

subset(wrongside,wrongside$wrongside_acc %in% boxplot(wrongside$wrongside_acc ~ wrongside$Region)$out)

##      Region States_UTs wrongside_acc wrongside_kill wrongside_gre_inj
## 3 northeast      Assam          1448            482               858
##   wrongside_min_inj wrongside_tot_inj kill_per_wrongside_acc
## 3               330              1188               3.328729
  • as we could there is one outlier in northeast region by confirming it in the boxplot which is ASSAM.
## plot
wrongside_point=ggplot(wrongside,aes(x=wrongside_acc,y=wrongside_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(wrongside_point)
## `geom_smooth()` using formula 'y ~ x'

cor(wrongside$wrongside_acc,wrongside$wrongside_kill,method = 'pearson')
## [1] 0.8914849
  • From the previous scatter plot we saw that the correlation 0.8914849 for wrongside accidents & kills is decreasing.
  • From this we can assume that eventhough accidents increases, there is slightly lower increase in kills as compared to OVERSPEED & DRUNKEN DRIVE accidents.
## barplot
wrongside_injuries=plot_ly(wrongside,x= ~States_UTs) %>%
  add_trace(y= ~wrongside_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~wrongside_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~wrongside_tot_inj,name='Total Injury',type='bar')
wrongside_injuries
wrongside_injuries_region=plot_ly(wrongside,x= ~Region) %>%
  add_trace(y= ~wrongside_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~wrongside_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~wrongside_tot_inj,name='Total Injury',type='bar')
wrongside_injuries_region
top10_wrongside_kill=filter(wrongside,wrongside_acc != wrongside_kill ) %>%
  top_n(10,kill_per_wrongside_acc) %>%
  arrange(desc(kill_per_wrongside_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_wrongside_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Wrongside") +
  xlab("STATES/UT") +
  ylab("Wrongside KILLS") +
  theme_dark()
plot(top10_wrongside_kill)

bottom10_wrongside_kill=filter(wrongside,wrongside_acc != wrongside_kill & wrongside_acc > 0 & wrongside_kill > 0 & wrongside_acc > wrongside_kill) %>%
  top_n(-10,kill_per_wrongside_acc) %>%
  arrange(desc(kill_per_wrongside_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_wrongside_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO wrongside") +
  xlab("STATES/UT") +
  ylab("wrongside KILLS") +
  theme_dark()
plot(bottom10_wrongside_kill)

* States like TAMIL NADU, UTTAR PRADESH & MADHYA PRADESH faces more number of accidents due to wrongside accidents. But as we saw in the scatter plot the accidents though are in large in number the kills are slightly increasing and as a result we see that it reflects in the injuries. * Accidents faced by the states TAMIL NADU & MADHYA PRADESH is high but the convincing part is most of them are found out to be the MINOR ACCIDENTS. * Which is not in the case of UTTAR PRADESH * We also found that the most of the state have less number of kills due to WRONGSIDE DRIVING. In this the highest kills is faced by DADRA & NAGAR HAVELI union territory and the lowest is in the state GOA.

  • we get to know that this accidents happen due to ONE WAY roads. And the states with high kills due to wrongside accidents must try to reduce the ONE WAY roads.
wrongside_result = select(wrongside,c('Region','kill_per_wrongside_acc')) %>%
  filter(kill_per_wrongside_acc >0) %>%
  group_by(Region) %>%
  summarise(kill_per10_wrongside_accidents=(sum(kill_per_wrongside_acc)/n())/10)
data.frame(wrongside_result)
##      Region kill_per10_wrongside_accidents
## 1   central                      0.4122998
## 2      east                      0.5664040
## 3     north                      0.4243812
## 4 northeast                      0.3721009
## 5     south                      0.1823934
## 6        UT                      0.7851541
## 7      west                      0.2552457
wrongside_bar=plot_ly(wrongside_result,x= ~Region) %>%
  add_trace(y= ~kill_per10_wrongside_accidents,name=' WRONGSIDE DRIVING ACCIDENTS (REGION_WISE)',type='bar')
wrongside_bar
wrongside_pie=plot_ly(wrongside_result,labels= ~Region,values= ~kill_per10_wrongside_accidents,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')
wrongside_pie

Analysis od JUMPING RED SIGNAL ACCIDENTS.

redsignal=select(violation,c(2:23))
redsignal=select(redsignal,c(-3:-17)) %>%
  mutate(kill_per_redsignal_acc=(redsig_kill/redsig_acc) * 10)
## histogram

redsignal_hist=ggplot(redsignal,aes(x=redsig_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(redsig_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept = median(redsig_acc),color='green',lwd=1)) +
  ggtitle("Histogram for jumping redsignal accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(redsignal_hist)

summary(redsignal$redsig_acc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0    41.5   123.4   111.2   939.0
  • From summary and histogram we find that the data is a right skewed.
  • at an average of 123.3611111 accidents occured all over india in 2018.
## outlier detection using boxplot
redsignal_boxplot=ggplot(redsignal,aes(x=Region,y=redsig_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for jumping redsignal accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(redsignal_boxplot)

subset(redsignal,redsignal$redsig_acc %in% boxplot(redsignal$redsig_acc ~ redsignal$Region)$out)

##       Region States_UTs redsig_acc redsig_kill redsig_gre_inj redsig_min_inj
## 3  northeast      Assam        159          62            120             29
## 24     south Tamil Nadu        939         290            117            882
## 34        UT      Delhi        658         176             76            507
##    redsig_tot_inj kill_per_redsignal_acc
## 3             149               3.899371
## 24            999               3.088392
## 34            583               2.674772
  • We find the outliers to be ASSAM, TAMIL NADU & DELHI in the NORTH EAST, SOUTH & UT Regions respectively.
  • And these states have more accidents compared to others in their respective regions which is not good for the states.
## plot
redsignal_point=ggplot(redsignal,aes(x=redsig_acc,y=redsig_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(redsignal_point)
## `geom_smooth()` using formula 'y ~ x'

cor(redsignal$redsig_acc,redsignal$redsig_kill,method = 'pearson')
## [1] 0.9581153
  • There is a high correlation that accidents increases the kills also increases.
## barplot
redsignal_injuries=plot_ly(redsignal,x= ~States_UTs) %>%
  add_trace(y= ~redsig_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~redsig_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~redsig_tot_inj,name='Total Injury',type='bar')
redsignal_injuries
redsignal_injuries_region=plot_ly(redsignal,x= ~Region) %>%
  add_trace(y= ~redsig_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~redsig_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~redsig_tot_inj,name='Total Injury',type='bar')
redsignal_injuries_region
top10_redsignal_kill=filter(redsignal,redsig_acc != redsig_kill ) %>%
  top_n(10,kill_per_redsignal_acc) %>%
  arrange(desc(kill_per_redsignal_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_redsignal_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping Redsignal") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(top10_redsignal_kill)

bottom10_redsignal_kill=filter(redsignal,redsig_acc != redsig_kill & redsig_acc > 0 & redsig_kill > 0 &
                                 redsig_acc > redsig_kill) %>%
  top_n(-10,kill_per_redsignal_acc) %>%
  arrange(desc(kill_per_redsignal_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_redsignal_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO Redsignal") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(bottom10_redsignal_kill)

* In TAMIL NADU there as been more number of accidents due to jumping redsignal but most of them were to be minor * Due to the high number of accidents in TAMIL NADU , south india seems to be the region with high percent of cases. * But it is the CENTRAL INDIA which causes a lot of accidents by jumping redsignals.

  • the states like MEGHALAYA,PUNJAB & JHARKHAND peaks with high kill rate for every 10 accidents due to jumping redsignal. while, ANDHRA PRADESH & KARNATAKA are the two states with low kill rate for every 10 accidents due to jumping redsignal.
redsignal_result = select(redsignal,c('Region','kill_per_redsignal_acc')) %>%
  filter(kill_per_redsignal_acc >0) %>%
  group_by(Region) %>%
  summarise(kill_per10_redsignal_accidents=(sum(kill_per_redsignal_acc)/n())/10)
data.frame(redsignal_result)
##      Region kill_per10_redsignal_accidents
## 1   central                      0.2700378
## 2      east                      0.5403756
## 3     north                      0.4376152
## 4 northeast                      0.4466457
## 5     south                      0.1298038
## 6        UT                      0.1732170
## 7      west                      0.2280415
redsignal_bar=plot_ly(redsignal_result,x= ~Region) %>%
  add_trace(y= ~kill_per10_redsignal_accidents,name=' JUMPING REDSIGNAL ACCIDENTS (REGION_WISE)',type='bar')
redsignal_bar
redsignal_pie=plot_ly(redsignal_result,labels= ~Region,values= ~kill_per10_redsignal_accidents,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')
redsignal_pie

Analysis for accidents happened by using MOBILE while driving

mobile=select(violation,c(2:28))
mobile=select(mobile,c(-3:-22)) %>%
  mutate(kill_per_mobile_acc=(mobile_kill/mobile_acc) * 10)
## histogram

mobile_hist=ggplot(mobile,aes(x=mobile_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(mobile_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept = median(mobile_acc),color='green',lwd=1)) +
  ggtitle("Histogram for jumping mobile accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(mobile_hist)

summary(mobile$mobile_acc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0    27.5   251.1   201.2  3828.0
  • From the histogram and summary() we get to this is a right skewed data.
  • And we come to know that many states have accidents less than 800 accidents.
## outlier detection using boxplot
mobile_boxplot=ggplot(mobile,aes(x=Region,y=mobile_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for jumping mobile accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(mobile_boxplot)

subset(mobile,mobile$mobile_acc %in% boxplot(mobile$mobile_acc ~ mobile$Region)$out)

##       Region States_UTs mobile_acc mobile_kill mobile_gre_inj mobile_min_inj
## 3  northeast      Assam        112          22             77             31
## 24     south Tamil Nadu       1477         252            631            930
## 34        UT      Delhi        244          37             74            151
##    mobile_tot_inj kill_per_mobile_acc
## 3             108            1.964286
## 24           1561            1.706161
## 34            225            1.516393
  • The outlier that we saw from Histogram and Boxplot were the states ASSAM,TAMIL NADU & DELHI of their respective region.
## plot
mobile_point=ggplot(mobile,aes(x=mobile_acc,y=mobile_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(mobile_point)
## `geom_smooth()` using formula 'y ~ x'

cor(mobile$mobile_acc,mobile$mobile_kill,method='pearson')
## [1] 0.9650849
  • The correlation is high and it says accidents increases then there is very high percentage of kill increasing too.
## barplot
mobile_injuries=plot_ly(mobile,x= ~States_UTs) %>%
  add_trace(y= ~mobile_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~mobile_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~mobile_tot_inj,name='Total Injury',type='bar')
mobile_injuries
mobile_injuries_region=plot_ly(mobile,x= ~Region) %>%
  add_trace(y= ~mobile_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~mobile_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~mobile_tot_inj,name='Total Injury',type='bar')
mobile_injuries_region
top10_mobile_kill=filter(mobile,mobile_acc != mobile_kill ) %>%
  top_n(10,kill_per_mobile_acc) %>%
  arrange(desc(kill_per_mobile_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_mobile_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping mobile") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(top10_mobile_kill)

bottom10_mobile_kill=filter(mobile,mobile_acc != mobile_kill & mobile_acc > 0 & mobile_kill > 0 &
                                 mobile_acc > mobile_kill) %>%
  top_n(-10,kill_per_mobile_acc) %>%
  arrange(desc(kill_per_mobile_acc)) %>%
  ggplot(aes(x=States_UTs,y=kill_per_mobile_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO mobile") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(bottom10_mobile_kill)

* Most of the injuries has been occured in UTTAR PRADESH due to using mobiles while driving. And most of them are grevious injuries. * But the state of WEST BENGAL tops per highest kill rate per 10 accidents that occur due to using mobile while driving. * While MANIPUR & ANDHRA PRADESH has been good with low kill rate.

mobile_result = select(mobile,c('Region','kill_per_mobile_acc')) %>%
  filter(kill_per_mobile_acc >0) %>%
  group_by(Region) %>%
  summarise(kill_per10_mobile_accidents=(sum(kill_per_mobile_acc)/n())/10)
data.frame(mobile_result)
##      Region kill_per10_mobile_accidents
## 1   central                   0.2830170
## 2      east                   1.0784650
## 3     north                   0.5520110
## 4 northeast                   0.4428547
## 5     south                   0.1681268
## 6        UT                   0.1516393
## 7      west                   0.3503197
mobile_bar=plot_ly(mobile_result,x= ~Region) %>%
  add_trace(y= ~kill_per10_mobile_accidents,name=' USING MOBILE ACCIDENTS (REGION_WISE)',type='bar')
mobile_bar
mobile_pie=plot_ly(mobile_result,labels= ~Region,values= ~kill_per10_mobile_accidents,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')
mobile_pie
### descriptive statistics

ACCIDENTS= data.frame(
  traffic_violations=c('Overspeeding','Drunken Drive','Wrongside Driving','Jumping Redsignal','Using Mobile'),
  total_accidents=c(sum(violation$over_acc),sum(violation$drunk_acc),sum(violation$wrongside_acc),sum(violation$redsig_acc),
                    sum(violation$mobile_acc)),
  total_kills=c(sum(violation$over_kill),sum(violation$drunk_kill),sum(violation$wrongside_kill),
                sum(violation$redsig_kill),sum(violation$mobile_kill)),
  total_injuries=c(sum(violation$over_tot_inj),sum(violation$drunk_tot_inj),sum(violation$wrongside_tot_inj),
                   sum(violation$redsig_tot_inj),sum(violation$mobile_tot_inj))
  
)
ACCIDENTS
##   traffic_violations total_accidents total_kills total_injuries
## 1       Overspeeding          310612       97588         316421
## 2      Drunken Drive           12018        4188           9944
## 3  Wrongside Driving           24781        8764          24100
## 4  Jumping Redsignal            4441        1545           4126
## 5       Using Mobile            9039        3707           7878
ACCIDENTS=within(ACCIDENTS,{
  traffic_violations=factor(traffic_violations)
  })
str(ACCIDENTS)
## 'data.frame':    5 obs. of  4 variables:
##  $ traffic_violations: Factor w/ 5 levels "Drunken Drive",..: 3 1 5 2 4
##  $ total_accidents   : int  310612 12018 24781 4441 9039
##  $ total_kills       : int  97588 4188 8764 1545 3707
##  $ total_injuries    : int  316421 9944 24100 4126 7878
summary(ACCIDENTS)
##          traffic_violations total_accidents   total_kills    total_injuries  
##  Drunken Drive    :1        Min.   :  4441   Min.   : 1545   Min.   :  4126  
##  Jumping Redsignal:1        1st Qu.:  9039   1st Qu.: 3707   1st Qu.:  7878  
##  Overspeeding     :1        Median : 12018   Median : 4188   Median :  9944  
##  Using Mobile     :1        Mean   : 72178   Mean   :23158   Mean   : 72494  
##  Wrongside Driving:1        3rd Qu.: 24781   3rd Qu.: 8764   3rd Qu.: 24100  
##                             Max.   :310612   Max.   :97588   Max.   :316421
cor(ACCIDENTS$total_accidents,ACCIDENTS$total_kills,method= 'pearson')
## [1] 0.9999653
acc_bar=plot_ly(ACCIDENTS,x= ~traffic_violations) %>%
  add_trace(y= ~total_accidents,name='ACCIDENTS',type='bar') %>%
  add_trace(y =~total_kills,name='DEATHS',type='bar') %>%
  add_trace(y =~total_injuries,name='INJURY',type='bar')
acc_bar
acc_pie=plot_ly(ACCIDENTS,labels= ~traffic_violations,values= ~total_accidents,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')

acc_pie
acc_kill_pie=plot_ly(ACCIDENTS,labels= ~traffic_violations,values= ~total_kills,type='pie',
                 textposition = 'inside',
                 textinfo = 'label+percent')
acc_kill_pie
acc_inj_pie=plot_ly(ACCIDENTS,labels= ~traffic_violations,values= ~total_injuries,type='pie',
                    textposition ='inside',
                    textinfo = 'label+percent')
acc_inj_pie
traffic=read.csv("C:\\Users\\DELL\\Downloads\\traffic.csv",header = TRUE)
traffic=within(traffic,{
  Region=factor(Region)})
#### Traffic signal controlled

trafficSignal=select(traffic,c(2:8)) %>%
  mutate(kill_per_trafficsignal_acc=(tra_sig_kill/tra_sig_acc) * 10)
## histogram
trafficSignal_hist=ggplot(trafficSignal,aes(x=tra_sig_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(tra_sig_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept =median(tra_sig_acc),color='green',lwd=1)) +
  ggtitle("Histogram for traffic signal controlled  accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(trafficSignal_hist)

## outlier detection using boxplot
trafficSignal_boxplot=ggplot(trafficSignal,aes(x=Region,y=tra_sig_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for traffic signal controlled accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(trafficSignal_boxplot)

subset(trafficSignal,trafficSignal$tra_sig_acc %in% boxplot(trafficSignal$tra_sig_acc ~ trafficSignal$Region)$out)

##       Region States_Uts tra_sig_acc tra_sig_kill tra_sig_gre_inj
## 3  northeast      Assam         173           34             106
## 24     south Tamil Nadu        2965          561             462
## 34        UT      Delhi         699          143             129
##    tra_sig_min_inj tra_sig_tot_inj kill_per_trafficsignal_acc
## 3               35             141                   1.965318
## 24            2853            3315                   1.892074
## 34             458             587                   2.045780
## plot
trafficSignal_point=ggplot(trafficSignal,aes(x=tra_sig_acc,y=tra_sig_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for traffic controlled accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(trafficSignal_point)
## `geom_smooth()` using formula 'y ~ x'

## barplot
trafficSignal_injuries=plot_ly(trafficSignal,x= ~States_Uts) %>%
  add_trace(y= ~tra_sig_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~tra_sig_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~tra_sig_tot_inj,name='Total Injury',type='bar')
trafficSignal_injuries
trafficSignal_injuries_region=plot_ly(trafficSignal,x= ~Region) %>%
  add_trace(y= ~tra_sig_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~tra_sig_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~tra_sig_tot_inj,name='Total Injury',type='bar')
trafficSignal_injuries_region
top10_trafficSignal_kill=filter(trafficSignal,tra_sig_acc != tra_sig_kill ) %>%
  top_n(10,kill_per_trafficsignal_acc) %>%
  arrange(desc(kill_per_trafficsignal_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_trafficsignal_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS IN Traffic signal controlled places") +
  xlab("STATES/UT") +
  ylab("traffic signal control KILLS") +
  theme_dark()
plot(top10_trafficSignal_kill)

bottom10_trafficSignal_kill=filter(trafficSignal,tra_sig_acc != tra_sig_kill & 
                                     tra_sig_acc > 0 & tra_sig_kill > 0 &
                                     tra_sig_acc > tra_sig_kill ) %>%
  top_n(-10,kill_per_trafficsignal_acc) %>%
  arrange(desc(kill_per_trafficsignal_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_trafficsignal_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS in traffic signal controlled places") +
  xlab("STATES/UT") +
  ylab("traffic signal control KILLS") +
  theme_dark()
plot(bottom10_trafficSignal_kill)

###########################################################################

policeControlled=select(traffic,c(2:13))
policeControlled=select(policeControlled,c(-3:-7)) %>% 
  mutate(kill_per_policeControlled_acc=(pol_kill/pol_acc) * 10)
## histogram

policeControlled_hist=ggplot(policeControlled,aes(x=pol_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(pol_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept =median(pol_acc),color='green',lwd=1)) +
  ggtitle("Histogram for policeControlled accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(policeControlled_hist)

## oUtlier detection using boxplot
policeControlled_boxplot=ggplot(policeControlled,aes(x=Region,y=pol_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for policeControlled accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(policeControlled_boxplot)

subset(policeControlled,policeControlled$pol_acc %in% boxplot(policeControlled$pol_acc ~ policeControlled$Region)$out)

##      Region States_Uts pol_acc pol_kill pol_gre_inj pol_min_inj pol_tot_inj
## 3 northeast      Assam     183       37         133          47         180
##   kill_per_policeControlled_acc
## 3                      2.021858
## plot
policeControlled_point=ggplot(policeControlled,aes(x=pol_acc,y=pol_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(overspeed_point)
## `geom_smooth()` using formula 'y ~ x'

## barplot
policeControlled_injuries=plot_ly(policeControlled,x= ~States_Uts) %>%
  add_trace(y= ~pol_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~pol_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~pol_tot_inj,name='Total Injury',type='bar')
policeControlled_injuries
policeControlled_injuries_region=plot_ly(policeControlled,x= ~Region) %>%
  add_trace(y= ~pol_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~pol_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~pol_tot_inj,name='Total Injury',type='bar')
policeControlled_injuries_region
top10_policeControlled_kill=filter(policeControlled,pol_acc != pol_kill ) %>%
  top_n(10,kill_per_policeControlled_acc) %>%
  arrange(desc(kill_per_policeControlled_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_policeControlled_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO policeControlled") +
  xlab("STATES/Ut") +
  ylab("policeControlled KILLS") +
  theme_dark()
plot(top10_overspeed_kill)

bottom10_policeControlled_kill=filter(policeControlled,pol_acc != pol_kill & pol_acc > 0 & pol_kill > 0 &
                                    pol_acc > pol_kill) %>%
  top_n(-10,kill_per_policeControlled_acc) %>%
  arrange(desc(kill_per_policeControlled_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_policeControlled_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO policeControlled") +
  xlab("STATES/Ut") +
  ylab("policeControlled KILLS") +
  theme_dark()
plot(bottom10_policeControlled_kill)

stopsignal=select(traffic,c(2:18))
stopsignal=select(stopsignal,c(-3:-12)) %>%
  mutate(kill_per_stopsignal_acc=(stop_kill/stop_acc) * 10)
## histogram

stopsignal_hist=ggplot(stopsignal,aes(x=stop_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(stop_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept = median(stop_acc),color='green',lwd=1)) +
  ggtitle("Histogram for stopsignal accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(stopsignal_hist)

## outlier detection using boxplot
stopsignal_boxplot=ggplot(stopsignal,aes(x=Region,y=stop_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for stopsignal accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(stopsignal_boxplot)

subset(stopsignal,stopsignal$stop_acc %in% boxplot(stopsignal$stop_acc ~ stopsignal$Region)$out)

##       Region States_Uts stop_acc stop_kill stop_gre_inj stop_min_inj
## 3  northeast      Assam      140        29           75           19
## 24     south Tamil Nadu      842       202          238          651
## 34        UT      Delhi      217        55           36          155
##    stop_tot_inj kill_per_stopsignal_acc
## 3            94                2.071429
## 24          889                2.399050
## 34          191                2.534562
## plot
stopsignal_point=ggplot(stopsignal,aes(x=stop_acc,y=stop_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(stopsignal_point)
## `geom_smooth()` using formula 'y ~ x'

## barplot
stopsignal_injuries=plot_ly(stopsignal,x= ~States_Uts) %>%
  add_trace(y= ~stop_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~stop_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~stop_tot_inj,name='Total Injury',type='bar')
stopsignal_injuries
stopsignal_injuries_region=plot_ly(stopsignal,x= ~Region) %>%
  add_trace(y= ~stop_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~stop_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~stop_tot_inj,name='Total Injury',type='bar')
stopsignal_injuries_region
top10_stopsignal_kill=filter(stopsignal,stop_acc != stop_kill ) %>%
  top_n(10,kill_per_stopsignal_acc) %>%
  arrange(desc(kill_per_stopsignal_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_stopsignal_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO stopsignal") +
  xlab("STATES/UT") +
  ylab("stopsignal KILLS") +
  theme_dark()
plot(top10_stopsignal_kill)

bottom10_stopsignal_kill=filter(stopsignal,stop_acc != stop_kill & stop_acc > 0 & stop_kill > 0 &
                                 stop_acc > stop_kill) %>%
  top_n(-10,kill_per_stopsignal_acc) %>%
  arrange(desc(kill_per_stopsignal_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_stopsignal_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO stopsignal") +
  xlab("STATES/UT") +
  ylab("stopsignal KILLS") +
  theme_dark()
plot(bottom10_stopsignal_kill)

#####################################################################################################

blinker=select(traffic,c(2:23))
blinker=select(blinker,c(-3:-17)) %>%
  mutate(kill_per_blinker_acc=(blinker_kill/blinker_acc) * 10)
## histogram

blinker_hist=ggplot(blinker,aes(x=blinker_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(blinker_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept = median(blinker_acc),color='green',lwd=1)) +
  ggtitle("Histogram for  blinker accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(blinker_hist)

## outlier detection using boxplot
blinker_boxplot=ggplot(blinker,aes(x=Region,y=blinker_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for  blinker accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(blinker_boxplot)

subset(blinker,blinker$blinker_acc %in% boxplot(blinker$blinker_acc ~ blinker$Region)$out)

##       Region States_Uts blinker_acc blinker_kill blinker_gre_inj
## 3  northeast      Assam         173           30              96
## 24     south Tamil Nadu        1499          264             287
## 34        UT      Delhi         320          157              37
##    blinker_min_inj blinker_tot_inj kill_per_blinker_acc
## 3               24             120             1.734104
## 24            1477            1764             1.761174
## 34             221             258             4.906250
## plot
blinker_point=ggplot(blinker,aes(x=blinker_acc,y=blinker_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(blinker_point)
## `geom_smooth()` using formula 'y ~ x'

## barplot
blinker_injuries=plot_ly(blinker,x= ~States_Uts) %>%
  add_trace(y= ~blinker_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~blinker_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~blinker_tot_inj,name='Total Injury',type='bar')
blinker_injuries
blinker_injuries_region=plot_ly(blinker,x= ~Region) %>%
  add_trace(y= ~blinker_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~blinker_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~blinker_tot_inj,name='Total Injury',type='bar')
blinker_injuries_region
top10_blinker_kill=filter(blinker,blinker_acc != blinker_kill ) %>%
  top_n(10,kill_per_blinker_acc) %>%
  arrange(desc(kill_per_blinker_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_blinker_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping blinker") +
  xlab("STATES/UT") +
  ylab("blinker KILLS") +
  theme_dark()
plot(top10_blinker_kill)

bottom10_blinker_kill=filter(blinker,blinker_acc != blinker_kill & blinker_acc > 0 & blinker_kill > 0 &
                                 blinker_acc > blinker_kill) %>%
  top_n(-10,kill_per_blinker_acc) %>%
  arrange(desc(kill_per_blinker_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_blinker_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO blinker") +
  xlab("STATES/UT") +
  ylab("blinker KILLS") +
  theme_dark()
plot(bottom10_blinker_kill)

uncontrolled=select(traffic,c(2:28))
uncontrolled=select(uncontrolled,c(-3:-22)) %>%
  mutate(kill_per_uncontrolled_acc=(uncont_kill/uncont_acc) * 10)
## histogram

uncontrolled_hist=ggplot(uncontrolled,aes(x=uncont_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(uncont_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept = median(uncont_acc),color='green',lwd=1)) +
  ggtitle("Histogram for jumping uncontrolled accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(uncontrolled_hist)

## outlier detection using boxplot
uncontrolled_boxplot=ggplot(uncontrolled,aes(x=Region,y=uncont_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for jumping uncontrolled accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(uncontrolled_boxplot)

subset(uncontrolled,uncontrolled$uncont_acc %in% boxplot(uncontrolled$uncont_acc ~ uncontrolled$Region)$out)

##       Region States_Uts uncont_acc uncont_kill uncont_gre_inj uncont_min_inj
## 3  northeast      Assam       2074         646           1303            232
## 22     north  Rajasthan       4576        2052           1531           2763
## 34        UT      Delhi       1943         516            352           1414
##    uncont_tot_inj kill_per_uncontrolled_acc
## 3            1535                  3.114754
## 22           4294                  4.484266
## 34           1766                  2.655687
## plot
uncontrolled_point=ggplot(uncontrolled,aes(x=uncont_acc,y=uncont_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(uncontrolled_point)
## `geom_smooth()` using formula 'y ~ x'

## barplot
uncontrolled_injuries=plot_ly(uncontrolled,x= ~States_Uts) %>%
  add_trace(y= ~uncont_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~uncont_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~uncont_tot_inj,name='Total Injury',type='bar')
uncontrolled_injuries
uncontrolled_injuries_region=plot_ly(uncontrolled,x= ~Region) %>%
  add_trace(y= ~uncont_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~uncont_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~uncont_tot_inj,name='Total Injury',type='bar')
uncontrolled_injuries_region
top10_uncontrolled_kill=filter(uncontrolled,uncont_acc != uncont_kill ) %>%
  top_n(10,kill_per_uncontrolled_acc) %>%
  arrange(desc(kill_per_uncontrolled_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_uncontrolled_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping uncontrolled") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(top10_uncontrolled_kill)

bottom10_uncontrolled_kill=filter(uncontrolled,uncont_acc != uncont_kill & uncont_acc > 0 & uncont_kill > 0 & uncont_acc > uncont_kill) %>%
  top_n(-10,kill_per_uncontrolled_acc) %>%
  arrange(desc(kill_per_uncontrolled_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_uncontrolled_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO uncontrolled") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(bottom10_uncontrolled_kill)

######################################################################################################

other=select(traffic,c(2:33))
other=select(other,c(-3:-27)) %>%
  mutate(kill_per_other_acc=(other_kill/other_acc) * 10)
## histogram

other_hist=ggplot(other,aes(x=other_acc)) +
  geom_histogram(bins=30,color="red",fill="orange",linetype="dashed") +
  geom_vline(aes(xintercept = mean(other_acc),color='blue',lwd=1)) +
  geom_vline(aes(xintercept = median(other_acc),color='green',lwd=1)) +
  ggtitle("Histogram for jumping other accidents") +
  xlab("Accidents")+
  ylab("Count")+
  theme_dark()
plot(other_hist)

## outlier detection using boxplot
other_boxplot=ggplot(other,aes(x=Region,y=other_acc,color=Region)) +
  geom_boxplot(outlier.color = 'red',outlier.shape = 2,outlier.size = 3) +
  stat_summary(fun =mean,
               geom = "point",
               size = 3,
               color = "steelblue") +
  theme_classic()+
  ggtitle("Boxplot for jumping other accidents  in every Region") +
  xlab("REGION") +
  ylab("Count")
plot(other_boxplot)

subset(other,other$other_acc %in% boxplot(other$other_acc ~ other$Region)$out)

##       Region States_Uts other_acc other_kill other_gre_inj other_min_inj
## 3  northeast      Assam      5505       2190          4251          1054
## 34        UT      Delhi      3142        783           510          2594
##    other_tot_inj kill_per_other_acc
## 3           5305           3.978202
## 34          3104           2.492043
## plot
other_point=ggplot(other,aes(x=other_acc,y=other_kill)) +
  geom_point(color='red',size=6) +
  geom_smooth(method = lm) +
  ggtitle("Scatter Plot for accidents") +
  xlab("ACCIDENTS") +
  ylab("KILL") +
  theme_classic()
plot(other_point)
## `geom_smooth()` using formula 'y ~ x'

## barplot
other_injuries=plot_ly(other,x= ~States_Uts) %>%
  add_trace(y= ~other_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~other_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~other_tot_inj,name='Total Injury',type='bar')
other_injuries
other_injuries_region=plot_ly(other,x= ~Region) %>%
  add_trace(y= ~other_gre_inj,name='Greviously',type='bar') %>%
  add_trace(y =~other_min_inj,name='Minor',type='bar') %>%
  add_trace(y =~other_tot_inj,name='Total Injury',type='bar')
other_injuries_region
top10_other_kill=filter(other,other_acc != other_kill ) %>%
  top_n(10,kill_per_other_acc) %>%
  arrange(desc(kill_per_other_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_other_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH HIGH KILLS DUE TO Jumping other") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(top10_other_kill)

bottom10_other_kill=filter(other,other_acc != other_kill & other_acc > 0 & other_kill > 0 &
                                    other_acc > other_kill) %>%
  top_n(-10,kill_per_other_acc) %>%
  arrange(desc(kill_per_other_acc)) %>%
  ggplot(aes(x=States_Uts,y=kill_per_other_acc)) +
  geom_bar(stat="identity",color="red",fill="orange",linetype="dashed")+
  ggtitle("TOP 10 STATES WITH LOW KILLS DUE TO other") +
  xlab("STATES/UT") +
  ylab("Redsignal KILLS") +
  theme_dark()
plot(bottom10_other_kill)

#########################################################################################

TRAFFIC_CONTROLS= data.frame(
  traffic_controls=c('TrafficSignal','Police Controlled','Stop Signal','Flash/Blinker','Uncontrolled Places','Others'),
  total_accidents=c(sum(traffic$tra_sig_acc),sum(traffic$pol_acc),sum(traffic$stop_acc),sum(traffic$blinker_acc),
                    sum(traffic$uncont_acc),sum(traffic$other_acc)),
  total_kills=c(sum(traffic$tra_sig_kill),sum(traffic$pol_kill),sum(traffic$stop_kill),sum(traffic$blinker_kill),
                sum(traffic$uncont_kill),sum(traffic$other_kill)),
  total_injuries=c(sum(traffic$tra_sig_tot_inj),sum(traffic$pol_tot_inj),sum(traffic$stop_tot_inj),sum(traffic$blinker_tot_inj),
                   sum(traffic$uncont_tot_inj),sum(traffic$other_tot_inj))
  )
TRAFFIC_CONTROLS=within(TRAFFIC_CONTROLS,{
  traffic_controls=factor(traffic_controls)
})
str(TRAFFIC_CONTROLS)
## 'data.frame':    6 obs. of  4 variables:
##  $ traffic_controls: Factor w/ 6 levels "Flash/Blinker",..: 5 3 4 1 6 2
##  $ total_accidents : int  13726 12793 6513 7904 114133 311975
##  $ total_kills     : int  3325 4090 2491 2757 33149 105605
##  $ total_injuries  : int  12468 11519 5665 7378 109344 323044
summary(TRAFFIC_CONTROLS)
##             traffic_controls total_accidents   total_kills     total_injuries  
##  Flash/Blinker      :1       Min.   :  6513   Min.   :  2491   Min.   :  5665  
##  Others             :1       1st Qu.:  9126   1st Qu.:  2899   1st Qu.:  8413  
##  Police Controlled  :1       Median : 13260   Median :  3708   Median : 11994  
##  Stop Signal        :1       Mean   : 77841   Mean   : 25236   Mean   : 78236  
##  TrafficSignal      :1       3rd Qu.: 89031   3rd Qu.: 25884   3rd Qu.: 85125  
##  Uncontrolled Places:1       Max.   :311975   Max.   :105605   Max.   :323044
cor(TRAFFIC_CONTROLS$total_accidents,TRAFFIC_CONTROLS$total_kills,method= 'pearson')
## [1] 0.9985625
acc_bar=plot_ly(TRAFFIC_CONTROLS,x= ~traffic_controls) %>%
  add_trace(y= ~total_accidents,name='ACCIDENTS',type='bar') %>%
  add_trace(y =~total_kills,name='DEATHS',type='bar') %>%
  add_trace(y =~total_injuries,name='INJURY',type='bar')
acc_bar
tc_acc_pie=plot_ly(TRAFFIC_CONTROLS,labels= ~traffic_controls,values= ~total_accidents,type = 'pie',
                  textposition = 'inside',
                  textinfo = 'label+percent')
tc_acc_pie
tc_kill_pie=plot_ly(TRAFFIC_CONTROLS,labels= ~traffic_controls,values= ~total_kills,type = 'pie',
                     textposition = 'inside',
                     textinfo = 'label+percent')
tc_kill_pie
tc_inj_pie=plot_ly(TRAFFIC_CONTROLS,labels= ~traffic_controls,values= ~total_injuries,type = 'pie',
                   textposition = 'inside',
                   textinfo = 'label+percent')

tc_inj_pie